package org.bjtu.word2vec;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.util.List;
import java.util.Properties;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import cn.edu.bjtu.abstractimpl.analyzer.AnsjDocumentAnalyzer;
import cn.edu.bjtu.interfaces.segment.DocumentSegmentation;
import cn.edu.bjtu.interfaces.vector.IDocumentVector;
import cn.edu.bjtu.model.word2vec.Word2VecForTransformingDocs;
import cn.edu.bjtu.model.word2vec.domain.Word2Vec;
import cn.edu.bjtu.tools.ModelInfo;
import cn.edu.bjtu.tools.message.ModelInfoMessage;
import net.sf.json.JSONObject;

public class DocsTransform {
	private static String RESOURCE_MODEL = "/home/liyao/tools/dataSets/test/wordsModel";
	private static String TRAIN_FILE = "";
	private static String TRNSFORMFILE = "";
	private static final Log LOG = LogFactory.getLog(ModelController.class);
	static{
		InputStream inStream = Word2VecForTransformingDocs.class.getClassLoader()
				.getResourceAsStream("appConfig.properties");
		Properties prop = new Properties();
		try {
			prop.load(inStream);
		} catch (IOException e) {
			LOG.error("Loading config file is fault");
		}
		RESOURCE_MODEL = prop.getProperty("modelpathtest");
		TRAIN_FILE = prop.getProperty("trainfile");
		TRNSFORMFILE = prop.getProperty("transformfiletest");
	}

	// 多文件转换（多文件单文档、多文件多文档）
	public static void docsToVecs(String docsFilePath) throws IOException {
		List<IDocumentVector> iter = null;
		if (!modelIsExist()) {
			word2VecmModeling();
		}
		File dir = new File(docsFilePath);
		DocumentSegmentation docSeg = new AnsjDocumentAnalyzer();
		Word2VecForTransformingDocs doctran = Word2VecForTransformingDocs.getInstance(docSeg);
		if (dir.exists() && dir.isDirectory()) {
			iter = doctran.transform(doctran.getDocuments(docsFilePath));
		} else if (dir.exists() && dir.isFile()) {
			iter = doctran.transform(doctran.getDoucument(docsFilePath));
		}
		// 写入文件
		writeToFile(iter);
	}

	// 判断模型是否存在
	private static boolean modelIsExist() {
		File file = new File(RESOURCE_MODEL);
		if (file.exists())
			return true;
		else
			return false;
	}

	// 建模过程（Word2Vec）
	public static String word2VecmModeling() {
//		initpara();
		String result = "";
		File trainFile = new File(TRAIN_FILE);
		ModelInfoMessage modelInfoMessage = null;
		if (trainFile.exists()) {
			Word2Vec learn = new Word2Vec();
			try {
				boolean learnResult = false;
				long start1 = System.currentTimeMillis();
				learnResult = learn.learnFile(trainFile);
				long end1 = System.currentTimeMillis();
				if (learnResult) {
					boolean saveModel = false;
					long start2 = System.currentTimeMillis();
					saveModel = learn.saveModel(new File(RESOURCE_MODEL));
					long end2 = System.currentTimeMillis();
					if (saveModel) {
						modelInfoMessage = new ModelInfoMessage(200, "");
						ModelInfo modelInfo = new ModelInfo(trainFile.getName(), end1 - start1 + end2 - start2,
								"file_only");
                       						modelInfoMessage.setModelInfo(modelInfo);
						result = JSONObject.fromObject(modelInfoMessage).toString();
					} else {
						modelInfoMessage = new ModelInfoMessage(2, "saving mode fail at" + RESOURCE_MODEL);
						result = JSONObject.fromObject(modelInfoMessage).toString();
					}
				} else {
					modelInfoMessage = new ModelInfoMessage(3, "leaning trainFile is fail at " + TRAIN_FILE);
					result = JSONObject.fromObject(modelInfoMessage).toString();
				}
			} catch (IOException e) {
				e.printStackTrace();
			}
		} else {
			modelInfoMessage = new ModelInfoMessage(404, "trainfile isn't exist");
			result = JSONObject.fromObject(modelInfoMessage).toString();
		}
		return result;
	}
	private static void writeToFile(List<IDocumentVector> iter) throws IOException {
		BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(TRNSFORMFILE), "utf-8"));
		for (IDocumentVector doc : iter) {
			StringBuilder sb = new StringBuilder();
			sb.append(doc.getDocLabel() + " ");
			for (int i = 0; i < doc.getVector().size(); i++) {
				sb.append((i + 1) + ":"+doc.getVector().get(i)).append(" ");
			}
			String line = sb.toString();
			writer.write(line);
			writer.newLine();
		}
		writer.close();

	}


	public static void main(String[] args) {
		/// home/liyao/tools/TBD/word2vecFile
		try {
			long start  = System.currentTimeMillis();
			docsToVecs("/home/liyao/tools/TBD/userData/");
			long end  = System.currentTimeMillis();
			System.out.println("time is"+(end-start));
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}
}
